From: Adam Cozzette Date: Wed, 5 Jan 2022 16:50:29 +0000 (-0800) Subject: [PATCH] Improve performance of parsing unknown fields in Java (#9371) X-Git-Tag: archive/raspbian/3.6.1.3-2+rpi1+deb10u1^2~3 X-Git-Url: https://dgit.raspbian.org/%22http:/www.example.com/cgi/%22https:/%22bookmarks:///%22http:/www.example.com/cgi/%22https:/%22bookmarks:/?a=commitdiff_plain;h=b337d6b688c018b1637c59840888af4f7531cd49;p=protobuf.git [PATCH] Improve performance of parsing unknown fields in Java (#9371) Credit should go to @elharo for most of these Java changes--I am just cherry-picking them from our internal codebase. The one thing I did change was to give the UTF-8 validation tests their own Bazel test target. This makes it possible to give the other tests a shorter timeout, which is important for UnknownFieldSetPerformanceTest in particular. Gbp-Pq: Name CVE-2021-22569.patch --- diff --git a/java/core/src/main/java/com/google/protobuf/UnknownFieldSet.java b/java/core/src/main/java/com/google/protobuf/UnknownFieldSet.java index 37d6463..07c8f7c 100644 --- a/java/core/src/main/java/com/google/protobuf/UnknownFieldSet.java +++ b/java/core/src/main/java/com/google/protobuf/UnknownFieldSet.java @@ -43,14 +43,14 @@ import java.util.Map; import java.util.TreeMap; /** - * {@code UnknownFieldSet} is used to keep track of fields which were seen when + * {@code UnknownFieldSet} keeps track of fields which were seen when * parsing a protocol message but whose field numbers or types are unrecognized. * This most frequently occurs when new fields are added to a message type * and then messages containing those fields are read by old software that was * compiled before the new types were added. * *

Every {@link Message} contains an {@code UnknownFieldSet} (and every - * {@link Message.Builder} contains an {@link Builder}). + * {@link Message.Builder} contains a {@link Builder}). * *

Most users will never need to use this class. * @@ -58,8 +58,13 @@ import java.util.TreeMap; */ public final class UnknownFieldSet implements MessageLite { - private UnknownFieldSet() { - fields = null; + private final TreeMap fields; + + /** + * Construct an {@code UnknownFieldSet} around the given map. + */ + UnknownFieldSet(TreeMap fields) { + this.fields = fields; } /** Create a new {@link Builder}. */ @@ -84,18 +89,7 @@ public final class UnknownFieldSet implements MessageLite { return defaultInstance; } private static final UnknownFieldSet defaultInstance = - new UnknownFieldSet(Collections.emptyMap(), - Collections.emptyMap()); - - /** - * Construct an {@code UnknownFieldSet} around the given map. The map is - * expected to be immutable. - */ - UnknownFieldSet(final Map fields, - final Map fieldsDescending) { - this.fields = fields; - } - private final Map fields; + new UnknownFieldSet(new TreeMap()); @Override @@ -109,12 +103,16 @@ public final class UnknownFieldSet implements MessageLite { @Override public int hashCode() { + if (fields.isEmpty()) { // avoid allocation of iterator. + // This optimization may not be helpful but it is needed for the allocation tests to pass. + return 0; + } return fields.hashCode(); } /** Get a map of fields in the set by number. */ public Map asMap() { - return fields; + return (Map) fields.clone(); } /** Check if the given field number is present in the set. */ @@ -201,7 +199,7 @@ public final class UnknownFieldSet implements MessageLite { @Override public void writeDelimitedTo(OutputStream output) throws IOException { final CodedOutputStream codedOutput = CodedOutputStream.newInstance(output); - codedOutput.writeRawVarint32(getSerializedSize()); + codedOutput.writeUInt32NoTag(getSerializedSize()); writeTo(codedOutput); codedOutput.flush(); } @@ -210,8 +208,10 @@ public final class UnknownFieldSet implements MessageLite { @Override public int getSerializedSize() { int result = 0; - for (final Map.Entry entry : fields.entrySet()) { - result += entry.getValue().getSerializedSize(entry.getKey()); + if (!fields.isEmpty()) { + for (Map.Entry entry : fields.entrySet()) { + result += entry.getValue().getSerializedSize(entry.getKey()); + } } return result; } @@ -296,18 +296,11 @@ public final class UnknownFieldSet implements MessageLite { // This constructor should never be called directly (except from 'create'). private Builder() {} - private Map fields; - - // Optimization: We keep around a builder for the last field that was - // modified so that we can efficiently add to it multiple times in a - // row (important when parsing an unknown repeated field). - private int lastFieldNumber; - private Field.Builder lastField; + private TreeMap fieldBuilders = + new TreeMap(); private static Builder create() { - Builder builder = new Builder(); - builder.reinitialize(); - return builder; + return new Builder(); } /** @@ -315,45 +308,33 @@ public final class UnknownFieldSet implements MessageLite { * values that already exist. */ private Field.Builder getFieldBuilder(final int number) { - if (lastField != null) { - if (number == lastFieldNumber) { - return lastField; - } - // Note: addField() will reset lastField and lastFieldNumber. - addField(lastFieldNumber, lastField.build()); - } if (number == 0) { return null; } else { - final Field existing = fields.get(number); - lastFieldNumber = number; - lastField = Field.newBuilder(); - if (existing != null) { - lastField.mergeFrom(existing); + Field.Builder builder = fieldBuilders.get(number); + if (builder == null) { + builder = Field.newBuilder(); + fieldBuilders.put(number, builder); } - return lastField; + return builder; } } /** * Build the {@link UnknownFieldSet} and return it. - * - *

Once {@code build()} has been called, the {@code Builder} will no - * longer be usable. Calling any method after {@code build()} will result - * in undefined behavior and can cause a {@code NullPointerException} to be - * thrown. */ @Override public UnknownFieldSet build() { - getFieldBuilder(0); // Force lastField to be built. final UnknownFieldSet result; - if (fields.isEmpty()) { + if (fieldBuilders.isEmpty()) { result = getDefaultInstance(); } else { - Map descendingFields = null; - result = new UnknownFieldSet(Collections.unmodifiableMap(fields), descendingFields); + TreeMap fields = new TreeMap(); + for (Map.Entry entry : fieldBuilders.entrySet()) { + fields.put(entry.getKey(), entry.getValue().build()); + } + result = new UnknownFieldSet(fields); } - fields = null; return result; } @@ -365,10 +346,13 @@ public final class UnknownFieldSet implements MessageLite { @Override public Builder clone() { - getFieldBuilder(0); // Force lastField to be built. - Map descendingFields = null; - return UnknownFieldSet.newBuilder().mergeFrom( - new UnknownFieldSet(fields, descendingFields)); + Builder clone = UnknownFieldSet.newBuilder(); + for (Map.Entry entry : fieldBuilders.entrySet()) { + Integer key = entry.getKey(); + Field.Builder value = entry.getValue(); + clone.fieldBuilders.put(key, value.clone()); + } + return clone; } @Override @@ -376,31 +360,24 @@ public final class UnknownFieldSet implements MessageLite { return UnknownFieldSet.getDefaultInstance(); } - private void reinitialize() { - fields = Collections.emptyMap(); - lastFieldNumber = 0; - lastField = null; - } - /** Reset the builder to an empty set. */ @Override public Builder clear() { - reinitialize(); + fieldBuilders = new TreeMap(); return this; } - /** Clear fields from the set with a given field number. */ + /** + * Clear fields from the set with a given field number. + * + * @throws IllegalArgumentException if number is not positive + */ public Builder clearField(final int number) { - if (number == 0) { - throw new IllegalArgumentException("Zero is not a valid field number."); + if (number <= 0) { + throw new IllegalArgumentException(number + " is not a valid field number."); } - if (lastField != null && lastFieldNumber == number) { - // Discard this. - lastField = null; - lastFieldNumber = 0; - } - if (fields.containsKey(number)) { - fields.remove(number); + if (fieldBuilders.containsKey(number)) { + fieldBuilders.remove(number); } return this; } @@ -422,10 +399,12 @@ public final class UnknownFieldSet implements MessageLite { /** * Add a field to the {@code UnknownFieldSet}. If a field with the same * number already exists, the two are merged. + * + * @throws IllegalArgumentException if number is not positive */ public Builder mergeField(final int number, final Field field) { - if (number == 0) { - throw new IllegalArgumentException("Zero is not a valid field number."); + if (number <= 0) { + throw new IllegalArgumentException(number + " is not a valid field number."); } if (hasField(number)) { getFieldBuilder(number).mergeFrom(field); @@ -442,10 +421,12 @@ public final class UnknownFieldSet implements MessageLite { * Convenience method for merging a new field containing a single varint * value. This is used in particular when an unknown enum value is * encountered. + * + * @throws IllegalArgumentException if number is not positive */ public Builder mergeVarintField(final int number, final int value) { - if (number == 0) { - throw new IllegalArgumentException("Zero is not a valid field number."); + if (number <= 0) { + throw new IllegalArgumentException(number + " is not a valid field number."); } getFieldBuilder(number).addVarint(value); return this; @@ -456,11 +437,13 @@ public final class UnknownFieldSet implements MessageLite { * Convenience method for merging a length-delimited field. * *

For use by generated code only. + * + * @throws IllegalArgumentException if number is not positive */ public Builder mergeLengthDelimitedField( final int number, final ByteString value) { - if (number == 0) { - throw new IllegalArgumentException("Zero is not a valid field number."); + if (number <= 0) { + throw new IllegalArgumentException(number + " is not a valid field number."); } getFieldBuilder(number).addLengthDelimited(value); return this; @@ -468,29 +451,20 @@ public final class UnknownFieldSet implements MessageLite { /** Check if the given field number is present in the set. */ public boolean hasField(final int number) { - if (number == 0) { - throw new IllegalArgumentException("Zero is not a valid field number."); - } - return number == lastFieldNumber || fields.containsKey(number); + return fieldBuilders.containsKey(number); } /** * Add a field to the {@code UnknownFieldSet}. If a field with the same * number already exists, it is removed. + * + * @throws IllegalArgumentException if number is not positive */ public Builder addField(final int number, final Field field) { - if (number == 0) { - throw new IllegalArgumentException("Zero is not a valid field number."); - } - if (lastField != null && lastFieldNumber == number) { - // Discard this. - lastField = null; - lastFieldNumber = 0; - } - if (fields.isEmpty()) { - fields = new TreeMap(); + if (number <= 0) { + throw new IllegalArgumentException(number + " is not a valid field number."); } - fields.put(number, field); + fieldBuilders.put(number, Field.newBuilder(field)); return this; } @@ -499,7 +473,10 @@ public final class UnknownFieldSet implements MessageLite { * fields are added, the changes may or may not be reflected in this map. */ public Map asMap() { - getFieldBuilder(0); // Force lastField to be built. + TreeMap fields = new TreeMap(); + for (Map.Entry entry : fieldBuilders.entrySet()) { + fields.put(entry.getKey(), entry.getValue().build()); + } return Collections.unmodifiableMap(fields); } @@ -871,54 +848,85 @@ public final class UnknownFieldSet implements MessageLite { *

Use {@link Field#newBuilder()} to construct a {@code Builder}. */ public static final class Builder { - // This constructor should never be called directly (except from 'create'). - private Builder() {} + // This constructor should only be called directly from 'create' and 'clone'. + private Builder() { + result = new Field(); + } private static Builder create() { Builder builder = new Builder(); - builder.result = new Field(); return builder; } private Field result; + @Override + public Builder clone() { + Field copy = new Field(); + if (result.varint == null) { + copy.varint = null; + } else { + copy.varint = new ArrayList(result.varint); + } + if (result.fixed32 == null) { + copy.fixed32 = null; + } else { + copy.fixed32 = new ArrayList(result.fixed32); + } + if (result.fixed64 == null) { + copy.fixed64 = null; + } else { + copy.fixed64 = new ArrayList(result.fixed64); + } + if (result.lengthDelimited == null) { + copy.lengthDelimited = null; + } else { + copy.lengthDelimited = new ArrayList(result.lengthDelimited); + } + if (result.group == null) { + copy.group = null; + } else { + copy.group = new ArrayList(result.group); + } + + Builder clone = new Builder(); + clone.result = copy; + return clone; + } + /** - * Build the field. After {@code build()} has been called, the - * {@code Builder} is no longer usable. Calling any other method will - * result in undefined behavior and can cause a - * {@code NullPointerException} to be thrown. + * Build the field. */ public Field build() { + Field built = new Field(); if (result.varint == null) { - result.varint = Collections.emptyList(); + built.varint = Collections.emptyList(); } else { - result.varint = Collections.unmodifiableList(result.varint); + built.varint = Collections.unmodifiableList(new ArrayList(result.varint)); } if (result.fixed32 == null) { - result.fixed32 = Collections.emptyList(); + built.fixed32 = Collections.emptyList(); } else { - result.fixed32 = Collections.unmodifiableList(result.fixed32); + built.fixed32 = Collections.unmodifiableList(new ArrayList(result.fixed32)); } if (result.fixed64 == null) { - result.fixed64 = Collections.emptyList(); + built.fixed64 = Collections.emptyList(); } else { - result.fixed64 = Collections.unmodifiableList(result.fixed64); + built.fixed64 = Collections.unmodifiableList(new ArrayList(result.fixed64)); } if (result.lengthDelimited == null) { - result.lengthDelimited = Collections.emptyList(); + built.lengthDelimited = Collections.emptyList(); } else { - result.lengthDelimited = - Collections.unmodifiableList(result.lengthDelimited); + built.lengthDelimited = Collections.unmodifiableList( + new ArrayList(result.lengthDelimited)); } if (result.group == null) { - result.group = Collections.emptyList(); + built.group = Collections.emptyList(); } else { - result.group = Collections.unmodifiableList(result.group); + built.group = Collections.unmodifiableList(new ArrayList(result.group)); } - final Field returnMe = result; - result = null; - return returnMe; + return built; } /** Discard the field's contents. */